Biomineralization Toolkit GO EnrichmentAnalysis

sessionInfo() #provides list of loaded packages and version of R. 
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.35     R6_2.5.1          fastmap_1.1.1     xfun_0.43        
##  [5] cachem_1.0.8      knitr_1.45        htmltools_0.5.8.1 rmarkdown_2.26   
##  [9] lifecycle_1.0.4   cli_3.6.2         sass_0.4.9        jquerylib_0.1.4  
## [13] compiler_4.3.2    rstudioapi_0.16.0 tools_4.3.2       evaluate_0.23    
## [17] bslib_0.7.0       yaml_2.3.8        rlang_1.1.3       jsonlite_1.8.8

First, load the necessary packages.

# load libraries - notes show the install command needed to install (pre installed)
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(forcats)
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(tidyr)
library(grDevices)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate
library(tibble)
library(gridExtra)
library(tidyr)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.16.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
library(GSEABase)
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
## 
##     rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:geneLenDataBase':
## 
##     unfactor
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
## 
##     desc
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: XML
## Loading required package: graph
## 
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
## 
##     addNode
## The following object is masked from 'package:circlize':
## 
##     degree
## The following object is masked from 'package:plyr':
## 
##     join
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
## The following objects are masked from 'package:zoo':
## 
##     yearmon, yearqtr
## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(stringr)
## 
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
## 
##     boundary
library(GenomicRanges)
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(rrvgo)

Biomineralization toolkit present in modules

biomin <- read.csv("../../output/Biomin_blast_Pocillopora_acuta_best_hit.csv") %>% rename("Pocillopora_acuta_best_hit" = "gene_id" )
geneInfo <- read.csv("../../output/WGCNA/GO_analysis/geneInfo_WGCNA.csv")
biomin_mod <- merge(biomin, geneInfo, by=c("gene_id"), all=F)
head(biomin_mod)
##                                     gene_id accessionnumber.geneID
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2         XP_022804785.1
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1              P33_g8985
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1             JR972076.1
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1            Gene:g13552
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1       aug_v2a.06327.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1             PFX18785.1
##                                                          definition
## 1 thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]
## 2                                      Flagellar associated protein
## 3              Acidic skeletal organic matrix protein (Acidic SOMP)
## 4                                     Acidic SOMP (Full-Length p27)
## 5                                                            SAARP3
## 6                                   Mucin-4 [Stylophora pistillata]
##                         Ref    X                           seqnames   start
## 1        Peled et al., 2020  224 Pocillopora_acuta_HIv2___Sc0000021 2329764
## 2        Drake et al., 2013  604 Pocillopora_acuta_HIv2___Sc0000013 2026351
## 3  Ramos-Silva et al., 2013 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 4 Mummadisetti et al., 2021 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 5     Takeuchi et al., 2016 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 6        Peled et al., 2020 1240 Pocillopora_acuta_HIv2___Sc0000005 2364120
##       end width strand   source       type score.x phase
## 1 2349234 19471      + AUGUSTUS transcript      NA    NA
## 2 2032291  5941      - AUGUSTUS transcript      NA    NA
## 3 7864189  3795      + AUGUSTUS transcript      NA    NA
## 4 7864189  3795      + AUGUSTUS transcript      NA    NA
## 5 7864189  3795      + AUGUSTUS transcript      NA    NA
## 6 2382635 18516      + AUGUSTUS transcript      NA    NA
##                                          ID
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1
##                               transcript_id       seed_ortholog    evalue
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2      45351.EDO38228 1.51e-306
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 6087.XP_002163848.1 4.11e-211
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1                <NA>        NA
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1                <NA>        NA
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1                <NA>        NA
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1                <NA>        NA
##   score.y
## 1     851
## 2     607
## 3      NA
## 4      NA
## 5      NA
## 6      NA
##                                                                                                                 eggNOG_OGs
## 1 COG0695@1|root,COG1249@1|root,KOG1752@2759|Eukaryota,KOG4716@2759|Eukaryota,38BHT@33154|Opisthokonta,3BBN5@33208|Metazoa
## 2                                           28N88@1|root,2QUTJ@2759|Eukaryota,39ZRG@33154|Opisthokonta,3BMP5@33208|Metazoa
## 3                                                                                                                     <NA>
## 4                                                                                                                     <NA>
## 5                                                                                                                     <NA>
## 6                                                                                                                     <NA>
##   max_annot_lvl COG_category                              Description
## 1 33208|Metazoa            C thioredoxin-disulfide reductase activity
## 2 33208|Metazoa            -                                        -
## 3          <NA>         <NA>                                     <NA>
## 4          <NA>         <NA>                                     <NA>
## 5          <NA>         <NA>                                     <NA>
## 6          <NA>         <NA>                                     <NA>
##   Preferred_name
## 1         TXNRD1
## 2              -
## 3           <NA>
## 4           <NA>
## 5           <NA>
## 6           <NA>
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            GOs
## 1 GO:0000003;GO:0000302;GO:0000305;GO:0001650;GO:0001704;GO:0001707;GO:0001887;GO:0001890;GO:0003006;GO:0003674;GO:0003824;GO:0004791;GO:0005488;GO:0005515;GO:0005575;GO:0005622;GO:0005623;GO:0005634;GO:0005654;GO:0005730;GO:0005737;GO:0005739;GO:0005783;GO:0005829;GO:0006082;GO:0006139;GO:0006518;GO:0006520;GO:0006575;GO:0006725;GO:0006732;GO:0006733;GO:0006739;GO:0006749;GO:0006753;GO:0006790;GO:0006793;GO:0006796;GO:0006807;GO:0006950;GO:0006979;GO:0007154;GO:0007165;GO:0007275;GO:0007369;GO:0007498;GO:0008150;GO:0008152;GO:0008283;GO:0009056;GO:0009069;GO:0009117;GO:0009611;GO:0009628;GO:0009636;GO:0009653;GO:0009790;GO:0009888;GO:0009987;GO:0010035;GO:0010038;GO:0010269;GO:0010941;GO:0010942;GO:0012505;GO:0015036;GO:0015949;GO:0016043;GO:0016174;GO:0016209;GO:0016259;GO:0016491;GO:0016651;GO:0016667;GO:0016668;GO:0016999;GO:0017001;GO:0017144;GO:0018996;GO:0019216;GO:0019222;GO:0019362;GO:0019637;GO:0019725;GO:0019752;GO:0022404;GO:0022414;GO:0022607;GO:0023052;GO:0031974;GO:0031981;GO:0032501;GO:0032502;GO:0033554;GO:0033797;GO:0034599;GO:0034641;GO:0036295;GO:0036296;GO:0036477;GO:0042221;GO:0042303;GO:0042395;GO:0042493;GO:0042537;GO:0042592;GO:0042737;GO:0042743;GO:0042744;GO:0042802;GO:0042803;GO:0043025;GO:0043167;GO:0043169;GO:0043226;GO:0043227;GO:0043228;GO:0043229;GO:0043231;GO:0043232;GO:0043233;GO:0043436;GO:0043603;GO:0043933;GO:0044085;GO:0044237;GO:0044238;GO:0044248;GO:0044281;GO:0044297;GO:0044422;GO:0044424;GO:0044428;GO:0044444;GO:0044446;GO:0044452;GO:0044464;GO:0045340;GO:0045454;GO:0046483;GO:0046496;GO:0046688;GO:0046872;GO:0046914;GO:0046983;GO:0048332;GO:0048513;GO:0048518;GO:0048522;GO:0048598;GO:0048608;GO:0048646;GO:0048678;GO:0048729;GO:0048731;GO:0048856;GO:0050664;GO:0050789;GO:0050794;GO:0050896;GO:0051186;GO:0051187;GO:0051259;GO:0051262;GO:0051716;GO:0055086;GO:0055093;GO:0055114;GO:0061458;GO:0065003;GO:0065007;GO:0065008;GO:0070013;GO:0070276;GO:0070482;GO:0070887;GO:0070995;GO:0071241;GO:0071248;GO:0071280;GO:0071453;GO:0071455;GO:0071704;GO:0071840;GO:0072524;GO:0072593;GO:0080090;GO:0097237;GO:0097458;GO:0098623;GO:0098625;GO:0098626;GO:0098754;GO:0098869;GO:1901360;GO:1901564;GO:1901605;GO:1901700;GO:1990748
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
##        EC   KEGG_ko                                       KEGG_Pathway
## 1 1.8.1.9 ko:K22182 ko00450,ko05200,ko05225,map00450,map05200,map05225
## 2       -         -                                                  -
## 3    <NA>      <NA>                                               <NA>
## 4    <NA>      <NA>                                               <NA>
## 5    <NA>      <NA>                                               <NA>
## 6    <NA>      <NA>                                               <NA>
##   KEGG_Module KEGG_Reaction     KEGG_rclass                   BRITE KEGG_TC
## 1           - R03596,R09372 RC02518,RC02873 ko00000,ko00001,ko01000       -
## 2           -             -               -                       -       -
## 3        <NA>          <NA>            <NA>                    <NA>    <NA>
## 4        <NA>          <NA>            <NA>                    <NA>    <NA>
## 5        <NA>          <NA>            <NA>                    <NA>    <NA>
## 6        <NA>          <NA>            <NA>                    <NA>    <NA>
##   CAZy BiGG_Reaction                                  PFAMs KEGG_new
## 1    -             - Glutaredoxin,Pyr_redox_2,Pyr_redox_dim   K22182
## 2    -             -                                      -     <NA>
## 3 <NA>          <NA>                                   <NA>     <NA>
## 4 <NA>          <NA>                                   <NA>     <NA>
## 5 <NA>          <NA>                                   <NA>     <NA>
## 6 <NA>          <NA>                                   <NA>   K22017
##                                substanceBXH                         geneSymbol
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 Pocillopora_acuta_HIv2___Sc0000021
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 Pocillopora_acuta_HIv2___Sc0000013
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 Pocillopora_acuta_HIv2___Sc0000005
##   moduleColor
## 1       brown
## 2   turquoise
## 3         red
## 4         red
## 5         red
## 6        pink
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       GO.terms
## 1 GO:0000003,GO:0000302,GO:0000305,GO:0001650,GO:0001704,GO:0001707,GO:0001887,GO:0001890,GO:0003006,GO:0003674,GO:0003824,GO:0004791,GO:0005488,GO:0005515,GO:0005575,GO:0005622,GO:0005623,GO:0005634,GO:0005654,GO:0005730,GO:0005737,GO:0005739,GO:0005783,GO:0005829,GO:0006082,GO:0006139,GO:0006518,GO:0006520,GO:0006575,GO:0006725,GO:0006732,GO:0006733,GO:0006739,GO:0006749,GO:0006753,GO:0006790,GO:0006793,GO:0006796,GO:0006807,GO:0006950,GO:0006979,GO:0007154,GO:0007165,GO:0007275,GO:0007369,GO:0007498,GO:0008150,GO:0008152,GO:0008283,GO:0009056,GO:0009069,GO:0009117,GO:0009611,GO:0009628,GO:0009636,GO:0009653,GO:0009790,GO:0009888,GO:0009987,GO:0010035,GO:0010038,GO:0010269,GO:0010941,GO:0010942,GO:0012505,GO:0015036,GO:0015949,GO:0016043,GO:0016174,GO:0016209,GO:0016259,GO:0016491,GO:0016651,GO:0016667,GO:0016668,GO:0016999,GO:0017001,GO:0017144,GO:0018996,GO:0019216,GO:0019222,GO:0019362,GO:0019637,GO:0019725,GO:0019752,GO:0022404,GO:0022414,GO:0022607,GO:0023052,GO:0031974,GO:0031981,GO:0032501,GO:0032502,GO:0033554,GO:0033797,GO:0034599,GO:0034641,GO:0036295,GO:0036296,GO:0036477,GO:0042221,GO:0042303,GO:0042395,GO:0042493,GO:0042537,GO:0042592,GO:0042737,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0043025,GO:0043167,GO:0043169,GO:0043226,GO:0043227,GO:0043228,GO:0043229,GO:0043231,GO:0043232,GO:0043233,GO:0043436,GO:0043603,GO:0043933,GO:0044085,GO:0044237,GO:0044238,GO:0044248,GO:0044281,GO:0044297,GO:0044422,GO:0044424,GO:0044428,GO:0044444,GO:0044446,GO:0044452,GO:0044464,GO:0045340,GO:0045454,GO:0046483,GO:0046496,GO:0046688,GO:0046872,GO:0046914,GO:0046983,GO:0048332,GO:0048513,GO:0048518,GO:0048522,GO:0048598,GO:0048608,GO:0048646,GO:0048678,GO:0048729,GO:0048731,GO:0048856,GO:0050664,GO:0050789,GO:0050794,GO:0050896,GO:0051186,GO:0051187,GO:0051259,GO:0051262,GO:0051716,GO:0055086,GO:0055093,GO:0055114,GO:0061458,GO:0065003,GO:0065007,GO:0065008,GO:0070013,GO:0070276,GO:0070482,GO:0070887,GO:0070995,GO:0071241,GO:0071248,GO:0071280,GO:0071453,GO:0071455,GO:0071704,GO:0071840,GO:0072524,GO:0072593,GO:0080090,GO:0097237,GO:0097458,GO:0098623,GO:0098625,GO:0098626,GO:0098754,GO:0098869,GO:1901360,GO:1901564,GO:1901605,GO:1901700,GO:1990748
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            -
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <NA>
##                             GO.description     GS.Flat    GS.Slope    p.GS.Flat
## 1 thioredoxin-disulfide reductase activity  0.57178848 -0.57178848 3.311055e-05
## 2                                        - -0.29586493  0.29586493 4.589336e-02
## 3                                     <NA>  0.35628512 -0.35628512 1.508700e-02
## 4                                     <NA>  0.35628512 -0.35628512 1.508700e-02
## 5                                     <NA>  0.35628512 -0.35628512 1.508700e-02
## 6                                     <NA> -0.05455251  0.05455251 7.187880e-01
##     p.GS.Slope    A.brown    p.A.brown  A.magenta  p.A.magenta      A.red
## 1 3.311055e-05  0.7005073 5.973619e-08 -0.3738439 1.048844e-02  0.2901298
## 2 4.589336e-02 -0.4291375 2.921081e-03  0.3115539 3.505853e-02 -0.3452015
## 3 1.508700e-02  0.4914202 5.241968e-04 -0.6288605 2.864308e-06  0.6673892
## 4 1.508700e-02  0.4914202 5.241968e-04 -0.6288605 2.864308e-06  0.6673892
## 5 1.508700e-02  0.4914202 5.241968e-04 -0.6288605 2.864308e-06  0.6673892
## 6 7.187880e-01  0.0972208 5.203783e-01 -0.3252127 2.743096e-02  0.3709019
##        p.A.red A.turquoise p.A.turquoise   A.purple   p.A.purple    A.green
## 1 5.047695e-02 -0.43323233  2.634293e-03  0.6984202 6.792759e-08  0.4574538
## 2 1.879503e-02  0.58815287  1.720729e-05 -0.1784560 2.353887e-01 -0.1306835
## 3 4.071016e-07 -0.13892006  3.571825e-01  0.1198762 4.274677e-01  0.2378899
## 4 4.071016e-07 -0.13892006  3.571825e-01  0.1198762 4.274677e-01  0.2378899
## 5 4.071016e-07 -0.13892006  3.571825e-01  0.1198762 4.274677e-01  0.2378899
## 6 1.116224e-02  0.08164806  5.895928e-01 -0.1391597 3.563456e-01  0.1614616
##     p.A.green A.lightcyan p.A.lightcyan     A.pink     p.A.pink      A.blue
## 1 0.001391986  -0.3508191  1.682948e-02  0.1707384 2.565893e-01  0.12358439
## 2 0.386672688   0.1196505  4.283449e-01 -0.1522331 3.125037e-01 -0.58598406
## 3 0.111386103  -0.6473842  1.159989e-06  0.7188738 1.835918e-08  0.07448551
## 4 0.111386103  -0.6473842  1.159989e-06  0.7188738 1.835918e-08  0.07448551
## 5 0.111386103  -0.6473842  1.159989e-06  0.7188738 1.835918e-08  0.07448551
## 6 0.283719167  -0.5276145  1.646022e-04  0.6417477 1.537006e-06 -0.02286640
##       p.A.blue  A.salmon  p.A.salmon A.midnightblue p.A.midnightblue
## 1 4.132051e-01 0.1178467 0.435389343      0.2439890       0.10224333
## 2 1.880492e-05 0.1907995 0.204028320      0.2258109       0.13131383
## 3 6.227429e-01 0.4254256 0.003204458      0.2691022       0.07053914
## 4 6.227429e-01 0.4254256 0.003204458      0.2691022       0.07053914
## 5 6.227429e-01 0.4254256 0.003204458      0.2691022       0.07053914
## 6 8.801027e-01 0.2940377 0.047315397      0.2906592       0.05003895
##       A.black  p.A.black      A.cyan  p.A.cyan    A.yellow   p.A.yellow
## 1 -0.28430645 0.05550307  0.04904562 0.7461773  0.05522073 0.7154873547
## 2 -0.18825739 0.21023361  0.07386502 0.6256510 -0.14392338 0.3399558326
## 3  0.09618758 0.52484209  0.16699226 0.2673276 -0.38010677 0.0091694889
## 4  0.09618758 0.52484209  0.16699226 0.2673276 -0.38010677 0.0091694889
## 5  0.09618758 0.52484209  0.16699226 0.2673276 -0.38010677 0.0091694889
## 6  0.02103556 0.88964060 -0.13338389 0.3768501 -0.46998526 0.0009821983
##        A.tan     p.A.tan Length
## 1  0.2648346 0.075293267  19471
## 2  0.2613466 0.079363532   5941
## 3 -0.2055446 0.170565420   3795
## 4 -0.2055446 0.170565420   3795
## 5 -0.2055446 0.170565420   3795
## 6 -0.3805358 0.009084622  18516
length(unique(biomin_mod$gene_id))
## [1] 65
plyr::count(biomin_mod, "moduleColor")
##    moduleColor freq
## 1        black    3
## 2         blue   36
## 3        brown   17
## 4         cyan    2
## 5        green    3
## 6      magenta    1
## 7         pink    7
## 8          red   18
## 9       salmon    6
## 10         tan    3
## 11   turquoise   21
## 12      yellow   10

GO term enrichment for biomineralization genes (65 unique genes, but only 22 have GO term annotation)

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)
ID.vector_biomin <- biomin_mod %>%
  pull(gene_id) %>% unique()

##Get a list of GO Terms for the biomineralization genes
GO.terms_biomin <- biomin_mod %>%
  dplyr::select(gene_id, GOs) %>% unique() %>% rename(GOs = "GO.terms")

dim(GO.terms_biomin)
## [1] 65  2
dim(GO.terms_biomin %>% filter(!is.na(GO.terms)))
## [1] 22  2
## only 22/65 of these genes have GO annotations!!

##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms)
## [1] 9012    2
dim(GO.terms %>% filter(!is.na(GO.terms)))
## [1] 4564    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene_id", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector = as.integer(ALL.vector %in% ID.vector_biomin) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 
## Warning in pcls(G): initial point very close to some inequality constraints

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector_biomin, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
head(GO)
##           GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 4600  GO:0015701            1.109811e-08                1.0000000          5
## 3485  GO:0008510            8.000832e-07                1.0000000          3
## 4475  GO:0015106            2.473894e-06                1.0000000          3
## 4532  GO:0015301            1.300672e-05                0.9999999          3
## 1416  GO:0004064            9.657516e-05                1.0000000          2
## 14975 GO:0099516            1.405808e-04                0.9999973          3
##       numInCat                                           term ontology
## 4600        11                          bicarbonate transport       BP
## 3485         3          sodium:bicarbonate symporter activity       MF
## 4475         4 bicarbonate transmembrane transporter activity       MF
## 4532         7                                           <NA>     <NA>
## 1416         2                          arylesterase activity       MF
## 14975       12                                           <NA>     <NA>
#Filtering for p < 0.05
GO_05 <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue)
   

#Write file of results 
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv")
go_results <-read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv")
head(go_results)
##   X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0015701            1.109811e-08                1.0000000          5
## 2 2 GO:0044331            4.344468e-04                0.9999983          2
## 3 3 GO:0051453            8.010476e-04                0.9999684          3
## 4 4 GO:0030641            8.796110e-04                0.9999641          3
## 5 5 GO:0006885            1.025213e-03                0.9999556          3
## 6 6 GO:1902476            1.143461e-03                0.9999494          3
##   numInCat                                    term ontology
## 1       11                   bicarbonate transport       BP
## 2        3 cell-cell adhesion mediated by cadherin       BP
## 3       29          regulation of intracellular pH       BP
## 4       30               regulation of cellular pH       BP
## 5       32                        regulation of pH       BP
## 6       26        chloride transmembrane transport       BP
go_results_BP <- go_results %>%
      filter(ontology=="BP") %>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 156   8
head(go_results_BP)
##   X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0015701            1.109811e-08                1.0000000          5
## 2 2 GO:0044331            4.344468e-04                0.9999983          2
## 3 3 GO:0051453            8.010476e-04                0.9999684          3
## 4 4 GO:0030641            8.796110e-04                0.9999641          3
## 5 5 GO:0006885            1.025213e-03                0.9999556          3
## 6 6 GO:1902476            1.143461e-03                0.9999494          3
##   numInCat                                    term ontology
## 1       11                   bicarbonate transport       BP
## 2        3 cell-cell adhesion mediated by cadherin       BP
## 3       29          regulation of intracellular pH       BP
## 4       30               regulation of cellular pH       BP
## 5       32                        regulation of pH       BP
## 6       26        chloride transmembrane transport       BP

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/biomin_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50))
dev.off()
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## 
## preparing gene to GO mapping data...
## preparing IC data...
 #calculate similarity 
scores <- setNames(-log(go_results_BP$over_represented_pvalue), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 51 10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>% filter(GOterm %in% reducedTerms$go)

#add in parent terms to list of go terms 
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_reduced$ParentTerm))
## [1] 15

The reduced list of terms is 51 terms that falls under 15 parent terms.

#write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/Biomineralization_goterms.csv")
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")

Can further reduce like so:

Further reduce by the following keywords

#add vector for terms of interest to reduce number of GO terms - NOT using this to look at individual modules for exploratory purposes
keywords<-c("metabolism", "carbon","bicarbonate", "apoptosis", "death", "symbiosis", "regulation of cell communication", "trans membrane transport", "transmembrane",  "organic substance transport", "inorganic substance transport","response to stress", "antioxidant", "calcification","biomineralization", "heat","transporters","proton transport","ion transport","acid-base", "homeostasis")

Search for these keywords in the parent terms

go_results_BP_reduced <- go_results_BP_reduced %>%
   filter(grepl(paste(keywords, collapse="|"), ParentTerm))

This is only 19 terms

#write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/Biomineralization_goterms.csv")
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered_keywords.csv")
#plot significantly enriched GO terms by Slim Category faceted by slim term 
 GO.plot_biomin <-  ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) + 
    geom_point(aes(size=over_represented_pvalue)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
GO.plot_biomin

Combining with Calcification Up and Down module info to make figure 4

Reduce the lists together

# GO_05_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv") %>% dplyr::select(-"X")
# GO_05_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv") %>% dplyr::select(-"X")
# GO_05_biomin <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv") %>% dplyr::select(-"X")
GO_05_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_up_bymod.csv") %>% dplyr::select(-"X")
GO_05_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_bymod.csv") %>% dplyr::select(-"X")
GO_05_biomin <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv") %>% dplyr::select(-"X")
go_results_BP_up <- GO_05_up %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
dim(go_results_BP_up)
## [1] 2772    8
go_results_BP_down <- GO_05_down %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
dim(go_results_BP_down)
## [1] 721   8
go_results_BP_biomin <- GO_05_biomin %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
dim(go_results_BP_biomin)
## [1] 156   7
all_enriched_terms_up_down_biomin <- c(go_results_BP_up$GOterm,go_results_BP_down$GOterm,go_results_BP_biomin$GOterm)

length(all_enriched_terms_up_down_biomin)
## [1] 3649
length(unique(all_enriched_terms_up_down_biomin))
## [1] 3342

By reducing the lists together, we can ensure that each GO term is given the same parent term whether the term was in the Up or Down modules or the biomineralization genes.

library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(unique(all_enriched_terms_up_down_biomin),
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity 
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores = "size",
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
## No scores provided. Falling back to term's GO size
dim(reducedTerms)
## [1] 2036   10
#keep only the goterms from the reduced list
go_results_BP_up_reduced <- go_results_BP_up %>%
  filter(GOterm %in% reducedTerms$go)

#add in parent terms to list of go terms 
go_results_BP_up_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_up_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_up_reduced$GOterm))
## [1] 1707
length(unique(go_results_BP_up_reduced$ParentTerm))
## [1] 147
#keep only the goterms from the reduced list
go_results_BP_down_reduced <- go_results_BP_down %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_down_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_down_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_down_reduced$GOterm))
## [1] 433
length(unique(go_results_BP_down_reduced$ParentTerm))
## [1] 85
#keep only the goterms from the reduced list
go_results_BP_biomin_reduced <- go_results_BP_biomin %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_biomin_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_biomin_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_biomin_reduced$GOterm))
## [1] 51
length(unique(go_results_BP_biomin_reduced$ParentTerm))
## [1] 19

Further reduce by the following keywords

#add vector for terms of interest to reduce number of GO terms - NOT using this to look at individual modules for exploratory purposes
keywords<-c("metabolism", "carbon","bicarbonate", "apoptosis", "death", "symbiosis", "regulation of cell communication", "trans membrane transport", "transmembrane",  "organic substance transport", "inorganic substance transport","response to stress", "antioxidant", "calcification","biomineralization", "heat","transporters","proton transport","ion transport","acid-base", "homeostasis")

Search for these keywords in the parent terms

go_results_BP_up_reduced_keywords <- go_results_BP_up_reduced %>%
   filter(grepl(paste(keywords, collapse="|"), ParentTerm))

length(unique(go_results_BP_up_reduced_keywords$GOterm))
## [1] 82
length(unique(go_results_BP_up_reduced_keywords$ParentTerm))
## [1] 7
go_results_BP_down_reduced_keywords <- go_results_BP_down_reduced %>%
   filter(grepl(paste(keywords, collapse="|"), ParentTerm))

length(unique(go_results_BP_down_reduced_keywords$GOterm))
## [1] 15
length(unique(go_results_BP_down_reduced_keywords$ParentTerm))
## [1] 5
go_results_BP_biomin_reduced_keywords <- go_results_BP_biomin_reduced %>%
   filter(grepl(paste(keywords, collapse="|"), ParentTerm))

length(unique(go_results_BP_biomin_reduced_keywords$GOterm))
## [1] 24
length(unique(go_results_BP_biomin_reduced_keywords$ParentTerm))
## [1] 3
#write.csv(go_results_BP_up_reduced_keywords, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
#write.csv(go_results_BP_down_reduced_keywords, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
#write.csv(go_results_BP_biomin_reduced_keywords, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")

Won’t move forward with these for now, but they are there if we need further reduced lists.

Combining lists with factor

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated for calcification. The list has been further reduced by using the package rrvgo.

#cal_up_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
cal_up_terms <- go_results_BP_up_reduced

cal_up_terms <- cal_up_terms %>%
  mutate(Factor = "Up")

cal_up_terms_parent_nterm <- cal_up_terms %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
  mutate(Calcification.direction = "Up")  %>% ungroup()

head(cal_up_terms)
##       GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0050794            7.575558e-23                        1        823
## 2 GO:0065007            9.035173e-22                        1        912
## 3 GO:0050789            1.200453e-20                        1        859
## 4 GO:0048522            2.736399e-18                        1        531
## 5 GO:0048518            2.733516e-17                        1        574
## 6 GO:0009987            1.373221e-16                        1       1074
##   numInCat                                      term ontology Module
## 1     2785            regulation of cellular process       BP   Blue
## 2     3184                     biological regulation       BP   Blue
## 3     2981          regulation of biological process       BP   Blue
## 4     1705   positive regulation of cellular process       BP   Blue
## 5     1888 positive regulation of biological process       BP   Blue
## 6     4026                          cellular process       BP   Blue
##                                                  ParentTerm Factor
## 1                            regulation of cellular process     Up
## 2                            regulation of cellular process     Up
## 3                            regulation of cellular process     Up
## 4 positive regulation of transcription by RNA polymerase II     Up
## 5 positive regulation of transcription by RNA polymerase II     Up
## 6                                          cellular process     Up

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were downregulated for calcification. The list has been further reduced by using the package rrvgo.

#cal_down_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
cal_down_terms <- go_results_BP_down_reduced
cal_down_terms <- cal_down_terms %>% mutate(Factor = "Down")

cal_down_terms_parent_nterm <- cal_down_terms %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
  mutate(Calcification.direction = "Down")  %>% ungroup()

head(cal_down_terms)
##       GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0002181            1.561228e-16                        1         67
## 2 GO:0006412            2.328139e-16                        1        123
## 3 GO:0043043            3.132084e-15                        1        125
## 4 GO:0006518            5.575430e-15                        1        145
## 5 GO:0006614            1.285770e-14                        1         48
## 6 GO:0043603            2.192130e-14                        1        170
##   numInCat                                                        term ontology
## 1       90                                     cytoplasmic translation       BP
## 2      219                                                 translation       BP
## 3      231                                peptide biosynthetic process       BP
## 4      285                                   peptide metabolic process       BP
## 5       59 SRP-dependent cotranslational protein targeting to membrane       BP
## 6      359                                     amide metabolic process       BP
##      Module        ParentTerm Factor
## 1 Turquoise       translation   Down
## 2 Turquoise       translation   Down
## 3 Turquoise       translation   Down
## 4 Turquoise       translation   Down
## 5 Turquoise protein transport   Down
## 6 Turquoise       translation   Down

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the list of biomineralization-related genes. The list has been further reduced by using the package rrvgo.

#cal_biomin_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")
cal_biomin_terms <- go_results_BP_biomin_reduced
cal_biomin_terms <- cal_biomin_terms %>% mutate(Factor = "Biomin") %>% mutate(Module = "Biomin")
head(cal_biomin_terms)
##       GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0015701            1.109811e-08                1.0000000          5
## 2 GO:0051453            8.010476e-04                0.9999684          3
## 3 GO:0030641            8.796110e-04                0.9999641          3
## 4 GO:0006885            1.025213e-03                0.9999556          3
## 5 GO:1902476            1.143461e-03                0.9999494          3
## 6 GO:0006821            1.203044e-03                0.9999456          3
##   numInCat                             term ontology
## 1       11            bicarbonate transport       BP
## 2       29   regulation of intracellular pH       BP
## 3       30        regulation of cellular pH       BP
## 4       32                 regulation of pH       BP
## 5       26 chloride transmembrane transport       BP
## 6       27               chloride transport       BP
##                              ParentTerm Factor Module
## 1      regulation of dopamine secretion Biomin Biomin
## 2 intracellular calcium ion homeostasis Biomin Biomin
## 3 intracellular calcium ion homeostasis Biomin Biomin
## 4 intracellular calcium ion homeostasis Biomin Biomin
## 5              monoatomic ion transport Biomin Biomin
## 6              monoatomic ion transport Biomin Biomin

Merge up and down-regulation of calcification GOterms along with GOterms enriched in the biomineralization gene list

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated or downregulated for calcification or in the biomineralization gene list. The list has been further reduced by using the package rrvgo.

all_terms <- rbind(cal_down_terms,cal_up_terms,cal_biomin_terms)

all_terms <-  all_terms[,c("Factor","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","ParentTerm","Module")]

all_terms$GOterm<-as.factor(all_terms$GOterm)

dim(all_terms) #785 reduced terms 
## [1] 2287   10
length(unique(all_terms$GOterm)) #this represents 780 unique terms between the three lists of reduced terms
## [1] 2036
length(unique(all_terms$ParentTerm)) #this represents 92 unique terms between the three lists of reduced terms
## [1] 152

Unique terms

This is collapsing the list from 2287 rows to 2036, representing the 2036 unique GO terms in the list above. It collapses the information in the columns “ParentTerm” and “Factor” so that for each GO term we get the parent terms associated and if this was enriched in modules upregulated for calcification, downregulated for calcification, or both.

goterms_shared <- all_terms %>%
  group_by(GOterm) %>%
  dplyr::summarise(
    ParentTerm = paste(unique(ParentTerm), collapse = ", "),
    Factor = paste(unique(Factor), collapse = ", "),
    term = paste(unique(term), collapse = ", ")
  )
dim(goterms_shared)
## [1] 2036    4
goterms_shared_full <- goterms_shared %>%
  left_join(dplyr::select(all_terms,GOterm, Factor, over_represented_pvalue), by = "GOterm") %>% #select 3 columns GOterm, Factor, over_represented_pvalue from all_terms and left join by GOterm, turns this dataframe from 1817 rows back to the 3453 in all_terms
  mutate(pvalue_Up = ifelse(Factor.y == "Up", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Up factor
  mutate(pvalue_Down = ifelse(Factor.y == "Down", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Down factor
  mutate(pvalue_Biomin = ifelse(Factor.y == "Biomin", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Biomin factor
  dplyr::select(-c("over_represented_pvalue", "Factor.y")) %>% #remove the unique columns so that we can collapse the dataframe
  group_by(GOterm, ParentTerm, Factor.x, term) %>% #group by the repeated columns for the non-unique GO terms
  dplyr::summarize(pvalue_Up = na.omit(pvalue_Up)[1], #carry over the p-value for the term in the up direction, by taking the first non-NA value.
            pvalue_Down = na.omit(pvalue_Down)[1], #carry over the p-value for the term in the down direction, by taking the first non-NA value.
            pvalue_Biomin = na.omit(pvalue_Biomin)[1]) %>% #carry over the p-value for the term in the biomin list, by taking the first non-NA value.
  rename(Factor.x = "Factor") #rename column 
## `summarise()` has grouped output by 'GOterm', 'ParentTerm', 'Factor.x'. You can
## override using the `.groups` argument.
write.csv(goterms_shared_full, "../../output/WGCNA/GO_analysis/Merged_GOterms_factor_ParentTerm_with_Biomin.csv")
goterms_shared_full <- read.csv("../../output/WGCNA/GO_analysis/Merged_GOterms_factor_ParentTerm_with_Biomin.csv")

calculations to check before plots

goterms_shared_full <- goterms_shared_full %>% group_by(ParentTerm) %>% mutate("N_in_Parent" = n()) %>% ungroup()

goterms_up <- goterms_shared_full %>% dplyr::filter(Factor=="Up")
goterms_down <- goterms_shared_full %>% dplyr::filter(Factor=="Down")
goterms_biomin <- goterms_shared_full %>% dplyr::filter(Factor=="Biomin")
goterms_shared_only <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up")
goterms_shared_only_biomin <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up, Biomin" | Factor=="Up, Biomin" | Factor=="Down, Biomin")

nrow(goterms_up)
## [1] 1555
nrow(goterms_down)
## [1] 294
nrow(goterms_biomin)
## [1] 32
nrow(goterms_shared_only)
## [1] 136
nrow(goterms_shared_only_biomin)
## [1] 19
nrow(goterms_up) + nrow(goterms_down) + nrow(goterms_biomin) + nrow(goterms_shared_only) + nrow(goterms_shared_only_biomin) == nrow(goterms_shared_full)
## [1] TRUE
#how many parent terms in each
length(unique(goterms_up$ParentTerm))
## [1] 144
length(unique(goterms_down$ParentTerm))
## [1] 78
length(unique(goterms_biomin$ParentTerm))
## [1] 16
length(unique(goterms_shared_only$ParentTerm))
## [1] 42
length(unique(goterms_shared_only_biomin$ParentTerm))
## [1] 7

Plot of parent terms SHARED by Up and Down modules

For each parent term in this list, how many are “Up” by calcification and “Down” by calcification, whether or not they are also enriched in the biomineralization gene list

SharedGOterms = # of GO terms within the parent term that are in the list for each of the different factors

result_parent_unique <- goterms_shared_full %>%
  group_by(ParentTerm,Factor) %>%
  dplyr::summarise(SharedGOterms = n_distinct(GOterm)) %>%
  arrange(-SharedGOterms)
## `summarise()` has grouped output by 'ParentTerm'. You can override using the
## `.groups` argument.
unique(result_parent_unique$Factor)
## [1] "Up"           "Down, Up"     "Down"         "Biomin"       "Up, Biomin"  
## [6] "Down, Biomin"

For SHARED parent terms (ones that are in Up AND Down, whether or not they are in Biomin)

parents_shared <- result_parent_unique %>%
  dplyr::filter(Factor == "Down, Up, Biomin" | Factor=="Down, Up")
 #dplyr::filter(SharedGOterms>=5) 

Calculate the percentage of shared GOterms with Biomin. for upregulation of calcification

merged_shared <- parents_shared %>%
  dplyr::group_by(ParentTerm) %>%
  mutate(
    Has_Biomin = any(Factor == "Down, Up, Biomin"),
    Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE),
    Proportion.of.shared.GO.terms.with.biomineralization.genes = ifelse(
      Factor %in% c("Down, Up, Biomin"),
      SharedGOterms / Sum_of_SharedGOterms,
      0
    )
  ) %>%
  mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100) %>%
  filter((Has_Biomin == TRUE & Factor == "Down, Up, Biomin") | (Has_Biomin == FALSE & Factor == "Down, Up"))

merged_shared_clean <- na.omit(merged_shared) #remove rows for the non-"Down, Up, Biomin" rows that don't have a percentage
head(merged_shared_clean)
## # A tibble: 6 × 7
## # Groups:   ParentTerm [6]
##   ParentTerm                Factor SharedGOterms Has_Biomin Sum_of_SharedGOterms
##   <chr>                     <chr>          <int> <lgl>                     <int>
## 1 proton motive force-driv… Down,…            25 FALSE                        25
## 2 metabolic process         Down,…            14 FALSE                        14
## 3 organic acid metabolic p… Down,…             9 FALSE                         9
## 4 ubiquitin-dependent prot… Down,…             6 FALSE                         6
## 5 nucleic acid phosphodies… Down,…             5 FALSE                         5
## 6 protein secretion         Down,…             5 FALSE                         5
## # ℹ 2 more variables:
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Frequency >0

cal_freq_terms_filtered_shared <- merged_shared_clean %>%
  filter(Sum_of_SharedGOterms>=0) 
head(cal_freq_terms_filtered_shared)
## # A tibble: 6 × 7
## # Groups:   ParentTerm [6]
##   ParentTerm                Factor SharedGOterms Has_Biomin Sum_of_SharedGOterms
##   <chr>                     <chr>          <int> <lgl>                     <int>
## 1 proton motive force-driv… Down,…            25 FALSE                        25
## 2 metabolic process         Down,…            14 FALSE                        14
## 3 organic acid metabolic p… Down,…             9 FALSE                         9
## 4 ubiquitin-dependent prot… Down,…             6 FALSE                         6
## 5 nucleic acid phosphodies… Down,…             5 FALSE                         5
## 6 protein secretion         Down,…             5 FALSE                         5
## # ℹ 2 more variables:
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_up<-ggplot(cal_freq_terms_filtered_shared, aes(y=Sum_of_SharedGOterms,x=reorder(ParentTerm, Sum_of_SharedGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_SharedGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
  scale_fill_gradientn(colours=c("white","#E6E6FA","#C9A0DC","#9370DB","#663399"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_colored_sharedUpDown.pdf", plot=freq_fig_up, dpi=300, height=15, width=10, units="in", limitsize=FALSE)

Frequency >20

cal_freq_terms_filtered_shared <- merged_shared_clean %>%
  filter(Sum_of_SharedGOterms>=20) 
head(cal_freq_terms_filtered_shared)
## # A tibble: 1 × 7
## # Groups:   ParentTerm [1]
##   ParentTerm                Factor SharedGOterms Has_Biomin Sum_of_SharedGOterms
##   <chr>                     <chr>          <int> <lgl>                     <int>
## 1 proton motive force-driv… Down,…            25 FALSE                        25
## # ℹ 2 more variables:
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_up<-ggplot(cal_freq_terms_filtered_shared, aes(y=Sum_of_SharedGOterms,x=reorder(ParentTerm, Sum_of_SharedGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_SharedGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  scale_fill_gradientn(colours=c("white","#E6E6FA","#C9A0DC","#9370DB","#663399"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_colored_sharedUpDown_gr20.pdf", plot=freq_fig_up, dpi=300, height=7, width=10, units="in", limitsize=FALSE)

Percentage shared >20

cal_freq_terms_filtered_shared <- merged_shared_clean %>%
  filter(Percentage.of.shared.GO.terms.with.biomineralization.genes>80) 
head(cal_freq_terms_filtered_shared)
## # A tibble: 0 × 7
## # Groups:   ParentTerm [0]
## # ℹ 7 variables: ParentTerm <chr>, Factor <chr>, SharedGOterms <int>,
## #   Has_Biomin <lgl>, Sum_of_SharedGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_up<-ggplot(cal_freq_terms_filtered_shared, aes(y=Sum_of_SharedGOterms,x=reorder(ParentTerm, Percentage.of.shared.GO.terms.with.biomineralization.genes), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_SharedGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  scale_fill_gradientn(colours=c("white","#E6E6FA","#C9A0DC","#9370DB","#663399"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_colored_sharedUpDown_80percentshared.pdf", plot=freq_fig_up, dpi=300, height=10, width=10, units="in", limitsize=FALSE)
freq_fig_up<-ggplot(cal_freq_terms_filtered_shared, aes(y=Sum_of_SharedGOterms,x=reorder(ParentTerm, Sum_of_SharedGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_SharedGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  scale_fill_gradientn(colours=c("white","#E6E6FA","#C9A0DC","#9370DB","#663399"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_colored_sharedUpDown_80percentsharedordered.pdf", plot=freq_fig_up, dpi=300, height=10, width=10, units="in", limitsize=FALSE)

Plot of parent terms UNIQUE for Up and Down modules

For UNIQUE parent terms (ones that are only in Up or only in Down, whether or not they are in Biomin)

For UNIQUE parent terms (ones that are only in Up or only in Down, whether or not they are in Biomin)

parent_filtered_up <- result_parent_unique %>%
 dplyr::filter(Factor == "Up" | Factor=="Up, Biomin")
#dplyr::filter(SharedGOterms>=5)

# parent_filtered_up <- result_parent_unique %>%
#   dplyr::filter(Factor=="Up, Biomin")
#  #dplyr::filter(SharedGOterms>=5) 
parent_filtered_down <- result_parent_unique %>%
 dplyr::filter(Factor=="Down" | Factor=="Down, Biomin")
#dplyr::filter(SharedGOterms>=5)

# parent_filtered_down <- result_parent_unique %>%
#   dplyr::filter(Factor=="Down, Biomin")
#  #dplyr::filter(SharedGOterms>=5) 

Calculate the percentage of unique UP GO parent terms with Biomin. for upregulation of calcification

merged_up <- parent_filtered_up %>%
  dplyr::group_by(ParentTerm) %>%
  left_join(cal_up_terms_parent_nterm, by = "ParentTerm") %>%
  rename(SharedGOterms = "UniqueGOTerms") %>%
  mutate(
    Has_Biomin = any(Factor == "Up, Biomin"),
    Sum_of_UniqueGOterms = sum(UniqueGOTerms, na.rm = TRUE),
    Proportion.of.shared.GO.terms.with.biomineralization.genes = ifelse(
      Factor %in% c("Up, Biomin"),
      UniqueGOTerms / Sum_of_UniqueGOterms,
      0
    )
  ) %>%
  mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100) %>%
  filter((Has_Biomin == TRUE & Factor == "Up, Biomin") | (Has_Biomin == FALSE & Factor == "Up"))

merged_up_clean <- na.omit(merged_up) #remove rows for the non-"Up, Biomin" rows that don't have a percentage
merged_up_clean
## # A tibble: 144 × 9
## # Groups:   ParentTerm [144]
##    ParentTerm        Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
##    <chr>             <chr>          <int>           <int> <chr>                 
##  1 cell cycle        Up                57              57 Up                    
##  2 regulation of ca… Up                46              47 Up                    
##  3 regulation of pr… Up                44              47 Up                    
##  4 IRE1-mediated un… Up                41              41 Up                    
##  5 lipid metabolic … Up                41              41 Up                    
##  6 protein transport Up                38              41 Up                    
##  7 ubiquitin-depend… Up                33              39 Up                    
##  8 intracellular si… Up                31              33 Up                    
##  9 innate immune re… Up                27              28 Up                    
## 10 mitochondrion or… Up                25              25 Up                    
## # ℹ 134 more rows
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Calculate the percentage of shared GOterms downregulation of calcification

merged_down <- parent_filtered_down %>%
  dplyr::group_by(ParentTerm) %>%
  left_join(cal_down_terms_parent_nterm, by = "ParentTerm") %>%
  rename(SharedGOterms = "UniqueGOTerms") %>%
  mutate(
    Has_Biomin = any(Factor == "Down, Biomin"),
    Sum_of_UniqueGOterms = sum(UniqueGOTerms, na.rm = TRUE),
    Proportion.of.shared.GO.terms.with.biomineralization.genes = ifelse(
      Factor %in% c("Down, Biomin"),
      UniqueGOTerms / Sum_of_UniqueGOterms,
      0
    )
  ) %>%
  mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100) %>%
  filter((Has_Biomin == TRUE & Factor == "Down, Biomin") | (Has_Biomin == FALSE & Factor == "Down"))

merged_down_clean <- na.omit(merged_down) #remove rows for the non-"Down, Biomin" rows that don't have a percentage
merged_down_clean
## # A tibble: 79 × 9
## # Groups:   ParentTerm [79]
##    ParentTerm        Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
##    <chr>             <chr>          <int>           <int> <chr>                 
##  1 ubiquitin-depend… Down              20              26 Down                  
##  2 peptidyl-serine … Down              14              16 Down                  
##  3 protein transport Down              13              16 Down                  
##  4 regulation of tr… Down              12              17 Down                  
##  5 reproduction      Down              11              13 Down                  
##  6 fatty acid metab… Down              10              12 Down                  
##  7 intracellular si… Down              10              12 Down                  
##  8 negative regulat… Down              10              12 Down                  
##  9 regulation of pr… Down              10              13 Down                  
## 10 ribosome biogene… Down               9               9 Down                  
## # ℹ 69 more rows
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Frequency all up

cal_freq_terms_filtered_up <- merged_up_clean %>%
  #filter(Number.of.terms>=10) %>%
  filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 144 × 9
## # Groups:   ParentTerm [144]
##    ParentTerm        Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
##    <chr>             <chr>          <int>           <int> <chr>                 
##  1 cell cycle        Up                57              57 Up                    
##  2 regulation of ca… Up                46              47 Up                    
##  3 regulation of pr… Up                44              47 Up                    
##  4 IRE1-mediated un… Up                41              41 Up                    
##  5 lipid metabolic … Up                41              41 Up                    
##  6 protein transport Up                38              41 Up                    
##  7 ubiquitin-depend… Up                33              39 Up                    
##  8 intracellular si… Up                31              33 Up                    
##  9 innate immune re… Up                27              28 Up                    
## 10 mitochondrion or… Up                25              25 Up                    
## # ℹ 134 more rows
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,15))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12)) #making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_colored_unique.pdf", plot=freq_fig_up, dpi=300, height=15, width=10, units="in", limitsize=FALSE)

Frequency all down

cal_freq_terms_filtered_down <- merged_down_clean %>%
  #filter(Number.of.terms>=10) %>%
  filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 79 × 9
## # Groups:   ParentTerm [79]
##    ParentTerm        Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
##    <chr>             <chr>          <int>           <int> <chr>                 
##  1 ubiquitin-depend… Down              20              26 Down                  
##  2 peptidyl-serine … Down              14              16 Down                  
##  3 protein transport Down              13              16 Down                  
##  4 regulation of tr… Down              12              17 Down                  
##  5 reproduction      Down              11              13 Down                  
##  6 fatty acid metab… Down              10              12 Down                  
##  7 intracellular si… Down              10              12 Down                  
##  8 negative regulat… Down              10              12 Down                  
##  9 regulation of pr… Down              10              13 Down                  
## 10 ribosome biogene… Down               9               9 Down                  
## # ℹ 69 more rows
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,20))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_colored_unique.pdf", plot=freq_fig_down, dpi=300, height=15, width=10, units="in", limitsize=FALSE)
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs

Percentage up >0

cal_freq_terms_filtered_up <- merged_up_clean %>%
  filter(Percentage.of.shared.GO.terms.with.biomineralization.genes>0) %>%
  filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 7 × 9
## # Groups:   ParentTerm [7]
##   ParentTerm         Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
##   <chr>              <chr>          <int>           <int> <chr>                 
## 1 intracellular cal… Up, B…             5              24 Up                    
## 2 monoatomic ion tr… Up, B…             5              13 Up                    
## 3 proton transmembr… Up, B…             2               8 Up                    
## 4 ammonium ion meta… Up, B…             1               2 Up                    
## 5 intracellular tra… Up, B…             1               7 Up                    
## 6 regulation of dop… Up, B…             1              11 Up                    
## 7 regulation of pro… Up, B…             1              26 Up                    
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,15))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12)) #making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_colored_unique_biomin.pdf", plot=freq_fig_up, dpi=300, height=5, width=10, units="in", limitsize=FALSE)

Frequency >0 down

cal_freq_terms_filtered_down <- merged_down_clean %>%
  filter(Percentage.of.shared.GO.terms.with.biomineralization.genes>0) %>%
  filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 2 × 9
## # Groups:   ParentTerm [2]
##   ParentTerm         Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
##   <chr>              <chr>          <int>           <int> <chr>                 
## 1 monoatomic ion tr… Down,…             2               4 Down                  
## 2 regulation of pro… Down,…             1               8 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,20))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_colored_unique_biomin.pdf", plot=freq_fig_down, dpi=300, height=5, width=10, units="in", limitsize=FALSE)
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs

Plot of parent terms SHARED and UNIQUE for Up and Down modules

For SHARED and UNIQUE parent terms

# parent_filtered_up <- result_parent_unique %>%
#   dplyr::filter(Factor=="Up, Biomin" | Factor == "Down, Up, Biomin")
#  #dplyr::filter(SharedGOterms>=5) 

parent_filtered_up <- result_parent_unique %>%
  dplyr::filter(Factor == "Up" | Factor=="Up, Biomin" | Factor == "Down, Up, Biomin" | Factor=="Down, Up")
 #dplyr::filter(SharedGOterms>=5) 
# parent_filtered_down <- result_parent_unique %>%
#   dplyr::filter(Factor=="Down, Biomin" | Factor == "Down, Up, Biomin")
#  #dplyr::filter(SharedGOterms>=5) 

parent_filtered_down <- result_parent_unique %>%
  dplyr::filter(Factor == "Down" | Factor=="Down, Biomin" | Factor == "Down, Up, Biomin" | Factor=="Down, Up")
 #dplyr::filter(SharedGOterms>=5) 

Calculate the percentage of UP GO parent terms with Biomin. for upregulation of calcification

merged_up <- parent_filtered_up %>%
  dplyr::group_by(ParentTerm) %>%
  left_join(cal_up_terms_parent_nterm, by = "ParentTerm") %>%
  rename(SharedGOterms = "UpGOTerms") %>%
  mutate(
    Has_Biomin = any(Factor %in% c("Up, Biomin", "Down, Up, Biomin")),
    Sum_of_UpGOterms = sum(UpGOTerms, na.rm = TRUE),
    Proportion.of.shared.GO.terms.with.biomineralization.genes = case_when(
      Factor %in% c("Up, Biomin", "Down, Up, Biomin") ~ UpGOTerms / Sum_of_UpGOterms,
      TRUE ~ 0
    )
  ) %>%
  mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100) %>%
  filter((Has_Biomin == TRUE & Factor == "Up, Biomin") | (Has_Biomin == TRUE & Factor == "Down, Up, Biomin") | (Has_Biomin == FALSE & Factor == "Up") | (Has_Biomin == FALSE & Factor == "Down, Up")) %>% 
  dplyr::group_by(ParentTerm) %>% 
  mutate(
    UpGOTerms = sum(UpGOTerms, na.rm = TRUE),
    Percentage.of.shared.GO.terms.with.biomineralization.genes = sum(Percentage.of.shared.GO.terms.with.biomineralization.genes, na.rm = TRUE),
    Proportion.of.shared.GO.terms.with.biomineralization.genes = sum(Proportion.of.shared.GO.terms.with.biomineralization.genes, na.rm = TRUE)
  ) %>%
  filter(row_number() == 1)

merged_up_clean <- na.omit(merged_up) #remove rows for the non-"Up, Biomin" rows that don't have a percentage
merged_up_clean
## # A tibble: 147 × 9
## # Groups:   ParentTerm [147]
##    ParentTerm Factor UpGOTerms Number.of.terms Calcification.direct…¹ Has_Biomin
##    <chr>      <chr>      <int>           <int> <chr>                  <lgl>     
##  1 cell cycle Up            57              57 Up                     FALSE     
##  2 regulatio… Up            47              47 Up                     FALSE     
##  3 regulatio… Up            47              47 Up                     FALSE     
##  4 IRE1-medi… Up            41              41 Up                     FALSE     
##  5 lipid met… Up            41              41 Up                     FALSE     
##  6 protein t… Up            41              41 Up                     FALSE     
##  7 ubiquitin… Up            39              39 Up                     FALSE     
##  8 intracell… Up            33              33 Up                     FALSE     
##  9 innate im… Up            28              28 Up                     FALSE     
## 10 mitochond… Up            25              25 Up                     FALSE     
## # ℹ 137 more rows
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 3 more variables: Sum_of_UpGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Calculate the percentage of Down GO parent terms with Biomin. for upregulation of calcification

merged_down <- parent_filtered_down %>%
  dplyr::group_by(ParentTerm) %>%
  left_join(cal_down_terms_parent_nterm, by = "ParentTerm") %>%
  rename(SharedGOterms = "DownGOTerms") %>%
  mutate(
    Has_Biomin = any(Factor %in% c("Down, Biomin", "Down, Up, Biomin")),
    Sum_of_DownGOterms = sum(DownGOTerms, na.rm = TRUE),
    Proportion.of.shared.GO.terms.with.biomineralization.genes = case_when(
      Factor %in% c("Down, Biomin", "Down, Up, Biomin") ~ DownGOTerms / Sum_of_DownGOterms,
      TRUE ~ 0
    )
  ) %>%
  mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100) %>%
  filter((Has_Biomin == TRUE & Factor == "Down, Biomin") | (Has_Biomin == TRUE & Factor == "Down, Up, Biomin") | (Has_Biomin == FALSE & Factor == "Down") | (Has_Biomin == FALSE & Factor == "Down, Up")) %>% 
  dplyr::group_by(ParentTerm) %>% 
  mutate(
    DownGOTerms = sum(DownGOTerms, na.rm = TRUE),
    Percentage.of.shared.GO.terms.with.biomineralization.genes = sum(Percentage.of.shared.GO.terms.with.biomineralization.genes, na.rm = TRUE),
    Proportion.of.shared.GO.terms.with.biomineralization.genes = sum(Proportion.of.shared.GO.terms.with.biomineralization.genes, na.rm = TRUE)
  ) %>%
  filter(row_number() == 1)

merged_down_clean <- na.omit(merged_down) #remove rows for the non-"Down, Biomin" rows that don't have a percentage
merged_down_clean
## # A tibble: 85 × 9
## # Groups:   ParentTerm [85]
##    ParentTerm          Factor DownGOTerms Number.of.terms Calcification.direct…¹
##    <chr>               <chr>        <int>           <int> <chr>                 
##  1 proton motive forc… Down,…          26              26 Down                  
##  2 ubiquitin-dependen… Down            26              26 Down                  
##  3 metabolic process   Down,…          15              15 Down                  
##  4 peptidyl-serine ph… Down            16              16 Down                  
##  5 protein transport   Down            16              16 Down                  
##  6 regulation of tran… Down            17              17 Down                  
##  7 reproduction        Down            13              13 Down                  
##  8 fatty acid metabol… Down            12              12 Down                  
##  9 intracellular sign… Down            12              12 Down                  
## 10 negative regulatio… Down            12              12 Down                  
## # ℹ 75 more rows
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_DownGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Frequency up

cal_freq_terms_filtered_up <- merged_up_clean %>%
  #filter(Number.of.terms>=20) %>%
  filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 147 × 9
## # Groups:   ParentTerm [147]
##    ParentTerm Factor UpGOTerms Number.of.terms Calcification.direct…¹ Has_Biomin
##    <chr>      <chr>      <int>           <int> <chr>                  <lgl>     
##  1 cell cycle Up            57              57 Up                     FALSE     
##  2 regulatio… Up            47              47 Up                     FALSE     
##  3 regulatio… Up            47              47 Up                     FALSE     
##  4 IRE1-medi… Up            41              41 Up                     FALSE     
##  5 lipid met… Up            41              41 Up                     FALSE     
##  6 protein t… Up            41              41 Up                     FALSE     
##  7 ubiquitin… Up            39              39 Up                     FALSE     
##  8 intracell… Up            33              33 Up                     FALSE     
##  9 innate im… Up            28              28 Up                     FALSE     
## 10 mitochond… Up            25              25 Up                     FALSE     
## # ℹ 137 more rows
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 3 more variables: Sum_of_UpGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,15))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12)) #making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_colored_all.pdf", plot=freq_fig_up, dpi=300, height=25, width=10, units="in", limitsize=FALSE)

Frequency down

cal_freq_terms_filtered_down <- merged_down_clean %>%
  #filter(Number.of.terms>=20) %>%
  filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 85 × 9
## # Groups:   ParentTerm [85]
##    ParentTerm          Factor DownGOTerms Number.of.terms Calcification.direct…¹
##    <chr>               <chr>        <int>           <int> <chr>                 
##  1 proton motive forc… Down,…          26              26 Down                  
##  2 ubiquitin-dependen… Down            26              26 Down                  
##  3 metabolic process   Down,…          15              15 Down                  
##  4 peptidyl-serine ph… Down            16              16 Down                  
##  5 protein transport   Down            16              16 Down                  
##  6 regulation of tran… Down            17              17 Down                  
##  7 reproduction        Down            13              13 Down                  
##  8 fatty acid metabol… Down            12              12 Down                  
##  9 intracellular sign… Down            12              12 Down                  
## 10 negative regulatio… Down            12              12 Down                  
## # ℹ 75 more rows
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_DownGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,20))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_colored_all.pdf", plot=freq_fig_down, dpi=300, height=25, width=10, units="in", limitsize=FALSE)

Frequency >20 up

cal_freq_terms_filtered_up <- merged_up_clean %>%
  filter(Number.of.terms>=20) %>%
  filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 25 × 9
## # Groups:   ParentTerm [25]
##    ParentTerm Factor UpGOTerms Number.of.terms Calcification.direct…¹ Has_Biomin
##    <chr>      <chr>      <int>           <int> <chr>                  <lgl>     
##  1 cell cycle Up            57              57 Up                     FALSE     
##  2 regulatio… Up            47              47 Up                     FALSE     
##  3 regulatio… Up            47              47 Up                     FALSE     
##  4 IRE1-medi… Up            41              41 Up                     FALSE     
##  5 lipid met… Up            41              41 Up                     FALSE     
##  6 protein t… Up            41              41 Up                     FALSE     
##  7 ubiquitin… Up            39              39 Up                     FALSE     
##  8 intracell… Up            33              33 Up                     FALSE     
##  9 innate im… Up            28              28 Up                     FALSE     
## 10 mitochond… Up            25              25 Up                     FALSE     
## # ℹ 15 more rows
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 3 more variables: Sum_of_UpGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,15))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12)) #making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_colored_all_gr20.pdf", plot=freq_fig_up, dpi=300, height=10, width=10, units="in", limitsize=FALSE)

Frequency >20 down

cal_freq_terms_filtered_down <- merged_down_clean %>%
  filter(Number.of.terms>=20) %>%
  filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 2 × 9
## # Groups:   ParentTerm [2]
##   ParentTerm           Factor DownGOTerms Number.of.terms Calcification.direct…¹
##   <chr>                <chr>        <int>           <int> <chr>                 
## 1 proton motive force… Down,…          26              26 Down                  
## 2 ubiquitin-dependent… Down            26              26 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_DownGOterms <int>,
## #   Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## #   Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  #scale_y_continuous(expression(GO~term~counts),limits=c(0,20))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  theme_classic()+
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_colored_all_gr20.pdf", plot=freq_fig_down, dpi=300, height=10, width=10, units="in", limitsize=FALSE)